*************************************************************
*************************************************************
****** LOAD DATABASE
*************************************************************
*************************************************************

clear all
set more off
set matsize 11000
cd "/Users/sebastianfleitas/Dropbox/ReclassificationRisk/data_stata"
*cd "C:\Users\sfleit2\Documents\ReclassificationRisk_DoFiles"

insheet using output_RF_vfixed.csv, comma
sort mbr_sys_id year
save output_RF.dta, replace 
clear all 

use database_by_members_n_choiceset.dta


****** CLEAN DATABASE VARIABLES THAT WE DON'T NEED AND ERRORS IN DATA
drop rx_dum*
sort  customer_number minor_market date_month alt_id
order customer_number minor_market date_month alt_id
drop if choice!=1 
drop if alt_id==""

****** CREATE VARIABLES FOR EXPENDITURE
gen paid_sum = paid_pharmacy + net_pd_amt
gen allowed_sum = allw_amt + allowed_pharma
gen outofpocketpharma = (allowed_pharma - paid_pharmacy)
gen outofpocketmedical = oop_dircomputed 
gen outpocket = (allowed_pharma - paid_pharmacy) + (allw_amt - net_pd_amt )

gen large_claim_50 = ( paid_sum > 50000)
gen large_claim_75 = ( paid_sum > 75000)
gen large_claim_100 = ( paid_sum > 100000)

**************************************
**************************************
**************************************
**** MERGING WITH RISK SCORES DATABASE 
sort mbr_sys_id year
merge mbr_sys_id year using NEW_riskscores_July2017_inc2015.dta
drop if _merge==2
drop _merge
**************************************
**************************************
**************************************


**************************************
****** THIS IS WITH RANDOM FOREST 
**************************************
sort mbr_sys_id year
merge mbr_sys_id year using output_RF.dta
drop if _merge==2
drop _merge
**************************************
rename pred_totexp_2012_108var_vfixed ORS_riskscore
gen FGL = 0.9127186 + (ORS_riskscore - 6486.558) * (1.412764/10497.09)
drop ORS_riskscore
rename FGL ORS_riskscore
sum acg_riskscore ORS_riskscore
**************************************
**************************************


/*
**************************************
*** ROBUSTNESS WITH 10 MONTHS 
sort mbr_sys_id year
merge mbr_sys_id year using NEW_riskscores_July2017_inc2015_10months.dta
drop if _merge==2
drop _merge
**************************************
drop acg_concurrent acg_riskscore 
rename acg_concurrent_10months acg_concurrent
rename acg_riskscore_10months acg_riskscore
exit
**************************************
*/



****** CREATE OTHER AUXILIARY VARIABLES 
tab year, gen(yeardum)
gen female = ( gdr_cd=="F")
tab minor_market, gen(marketdum)
gen aux=1 
* create the numer of people per firm and per plan
bys customer_number  date_month          : egen numpeople     = total(aux)
bys customer_number  date_month polnbr   : egen numpeopleplan = total(aux)
* Compute the average premium per person at firm-plan level
gen av_premiumpc_plan = (total_premium*12)/numpeopleplan
*********************************************************************************************

**********************************************************************************************
**********************************************************************************************
**********************************************************************************************
**********************************************************************************************
***** DROP ERRORS IN MEMBERS (NOT MERGED)
egen auxgroup = group( customer_number mbr_sys_id )
duplicates tag auxgroup  year, generate(check)
drop if check==19
drop check
**********************************************************************************************
**********************************************************************************************
**********************************************************************************************
**********************************************************************************************


*********************************************************************************************
** CLEAN NON COMPLETE OBSERVATIONS
*********************************************************************************************
sort customer_number year
merge customer_number year using firmsinsample_July2017.dta
*drop 8 firms without complete obervations for two years
drop if customer_number=="01H5518"	
drop if customer_number=="0253935"	
drop if customer_number=="0268272"
drop if customer_number=="0386663"
drop if customer_number=="0466732"
drop if customer_number=="06H2419"	
drop if customer_number=="0765480"	
drop if customer_number=="08M6991"
*drop if firmstays!=1
drop _merge
*********************************************************************************************
*********************************************************************************************
*********************************************************************************************

tab year, gen(yeardummy)

**********************************************************************************************
**********************************************************************************************
**********************************************************************************************
**********************************************************************************************
*********************************************************************************************
******** HERE ALL THE REGRESSIONS AT THE INDIVIDUAL LEVEL FOR EMPLOYEES
*********************************************************************************************
xtset mbr_sys_id year
set more off
gen laggedscore = L.acg_riskscore
gen laggedscore_ORS = L.ORS_riskscore

**********************************************************************************************
**********************************************************************************************
******************** DESCRIPTIVE STATISTICS FOR FINAL SAMPLE 
**********************************************************************************************
**********************************************************************************************

xtset mbr_sys_id year

gen lagged_code_hypertension = L.code_hypertension 
gen lagged_code_heartfailure = L.code_heartfailure 
gen lagged_code_chronickidneydisease = L.code_chronickidneydisease 
gen lagged_code_atrialfibrilization= L.code_atrialfibrilization 
gen lagged_code_asthma =L.code_asthma

gen lagged_chronic_cancer     = L.code_cancer
gen lagged_chronic_transplant = L.code_transplant
gen lagged_chronic_ami        = L.code_ami
gen lagged_chronic_diabetes   = L.code_diabetes

gen lagged_code_brainhemorr = L.code_brainhemorr
gen laggeed_code_respfailure = L.code_respfailure


****************************************************
****** CREATING Rp BEFORE DROPPING DATA 2012 ******* 
****************************************************

bys customer_number  year        : egen mean_pred_riskscore_rp_aux = mean(acg_riskscore)
xtset mbr_sys_id year
gen mean_pred_riskscore_rp = L.mean_pred_riskscore_rp_aux

bys customer_number  year        : egen mean_ORS_riskscore_rp_aux = mean(ORS_riskscore)
xtset mbr_sys_id year
gen mean_ORS_riskscore_rp = L.mean_ORS_riskscore_rp_aux

bys prime_plan_code  year        : egen mean_pred_policy_rp_aux = mean(acg_riskscore)
xtset mbr_sys_id year
gen mean_pred_policy_rp = L.mean_pred_policy_rp_aux

bys prime_plan_code  year        : egen mean_ORS_policy_rp_aux = mean(ORS_riskscore)
xtset mbr_sys_id year
gen mean_ORS_policy_rp  = L.mean_ORS_policy_rp_aux
****************************************************
****************************************************



****************************************************




****************************************************
**** Other variables 
****************************************************
gen laggedclaims = L.paid_sum
gen laggedoutpocket = L.outpocket
gen laggedallowed_sum = L.allowed_sum

gen aux_auxpremium2012 = av_premiumpc_plan if year==2012
bys customer_number: egen auxpremium2012 = mean(aux_auxpremium2012)

drop if  firmstays!=1
drop if year==2012
tab rel_cd , gen(relationship)

egen in2012 = total(laggedclaims!=. & year==2013), by(mbr_sys_id)
egen in2013 = total(year==2013), by(mbr_sys_id)
egen in2014 = total(year==2014), by(mbr_sys_id)
egen in2015 = total(year==2015), by(mbr_sys_id)
gen only2013 = (in2015!=1 & in2014!=1 & in2013==1 & in2012!=1)
gen only2014 = (in2015!=1 & in2014==1 & in2013!=1 & in2012!=1)
gen only2015 = (in2015==1 & in2014!=1 & in2013!=1 & in2012!=1)
gen exjoiners_201314no2012 =      (in2015!=1 & in2014==1 & in2013==1 & in2012!=1)
gen exquitters_201213no2014 =     (in2015!=1 & in2014!=1 & in2013==1 & in2012==1)
gen exstayers_20121314no2015 =    (in2015!=1 & in2014==1 & in2013==1 & in2012==1)
gen newjoiners2013_stayers2015 =  (in2015==1 & in2014==1 & in2013==1 & in2012!=1)
gen newjoiners2014 =              (in2015==1 & in2014==1 & in2013!=1 & in2012!=1)
gen newstayers_allyears =         (in2015==1 & in2014==1 & in2013==1 & in2012==1)
gen everyone_onlyoneyear_aux = only2013 + only2014 + only2015
gen nocathegory = 1- everyone_onlyoneyear_aux - exjoiners_201314no2012 - exquitters_201213no2014 - exstayers_20121314no2015 - newjoiners2013_stayers2015 - newjoiners2014 - newstayers_allyears
gen everyone_onlyoneyear_scattered = nocathegory + everyone_onlyoneyear_aux


bys cust_seg_nbr year: egen sd_age = sd(age)
egen mean_sd_age = mean(sd_age)

bys cust_seg_nbr year: egen sd_acg_riskscore = sd(acg_riskscore)
egen mean_sd_acg_riskscore = mean(sd_acg_riskscore)

browse mean_sd_age mean_sd_acg_riskscore

**********************************************************************************************
**********************************************************************************************

egen firm = group(customer_number)
egen market = group(minor_market)
quietly: tab(market), gen(dum_market)
quietly: tab(age), gen(dum_age)
quietly: tab(sic_cd), gen(dum_sector)


gen hhi2013=0
replace hhi2013 =  6668  if minor_market=="DELAWARE MINOR"
replace hhi2013 =  1927  if minor_market=="ARKANSAS MINOR"
replace hhi2013 =  3158  if minor_market=="CENTRAL TEXAS MINOR" | minor_market=="DALLAS MINOR" | minor_market=="HOUSTON MINOR"  | minor_market=="NORTH TEXAS MINOR" | minor_market=="SOUTH TEXAS MINOR" 
replace hhi2013 =  1398  if minor_market=="EAST PENNSYLVANIA MINOR" | minor_market=="PENNSYLVANIA MINOR" | minor_market=="WEST PENNSYLVANIA MINOR"
replace hhi2013 =  3313  if minor_market=="ILLINOIS MINOR" | minor_market=="RV ILLINOIS MINOR" | minor_market=="SOUTHERN IL MINOR" 
replace hhi2013 =  7159  if minor_market=="KANSAS CITY MO MINOR" | minor_market=="MISSOURI MINOR"
replace hhi2013 =  4087  if minor_market=="OKLAHOMA MINOR"
replace hhi2013 =  4895  if minor_market=="TENNESSEE MINOR"
replace hhi2013 =  5165  if minor_market=="WISCONSIN MINOR"
replace hhi2013 =  4104  if minor_market=="WYOMING MINOR"

gen hhi2014=0
replace hhi2014 = 5631   if minor_market=="DELAWARE MINOR"
replace hhi2014 = 4816  if minor_market=="ARKANSAS MINOR"
replace hhi2014 = 3401  if minor_market=="CENTRAL TEXAS MINOR" | minor_market=="DALLAS MINOR" | minor_market=="HOUSTON MINOR"  | minor_market=="NORTH TEXAS MINOR" | minor_market=="SOUTH TEXAS MINOR" 
replace hhi2014 = 1624  if minor_market=="EAST PENNSYLVANIA MINOR" | minor_market=="PENNSYLVANIA MINOR" | minor_market=="WEST PENNSYLVANIA MINOR"
replace hhi2014 = 4075 if minor_market=="ILLINOIS MINOR" | minor_market=="RV ILLINOIS MINOR" | minor_market=="SOUTHERN IL MINOR" 
replace hhi2014 = 2700 if minor_market=="KANSAS CITY MO MINOR" | minor_market=="MISSOURI MINOR"
replace hhi2014 = 4633  if minor_market=="OKLAHOMA MINOR"
replace hhi2014 = 4968 if minor_market=="TENNESSEE MINOR"
replace hhi2014 = 1638  if minor_market=="WISCONSIN MINOR"
replace hhi2014 = 4320 if minor_market=="WYOMING MINOR"

gen hhi2015=0
replace hhi2015 = 5385 if minor_market=="DELAWARE MINOR"
replace hhi2015 = 5236 if minor_market=="ARKANSAS MINOR"
replace hhi2015 = 3715 if minor_market=="CENTRAL TEXAS MINOR" | minor_market=="DALLAS MINOR" | minor_market=="HOUSTON MINOR"  | minor_market=="NORTH TEXAS MINOR" | minor_market=="SOUTH TEXAS MINOR" 
replace hhi2015 = 2315  if minor_market=="EAST PENNSYLVANIA MINOR" | minor_market=="PENNSYLVANIA MINOR" | minor_market=="WEST PENNSYLVANIA MINOR"
replace hhi2015 = 4714 if minor_market=="ILLINOIS MINOR" | minor_market=="RV ILLINOIS MINOR" | minor_market=="SOUTHERN IL MINOR" 
replace hhi2015 = 2912 if minor_market=="KANSAS CITY MO MINOR" | minor_market=="MISSOURI MINOR"
replace hhi2015 = 5460  if minor_market=="OKLAHOMA MINOR"
replace hhi2015 = 4744 if minor_market=="TENNESSEE MINOR"
replace hhi2015 = 1443  if minor_market=="WISCONSIN MINOR"
replace hhi2015 = 5160 if minor_market=="WYOMING MINOR"

gen leader2013=0
replace leader2013 =  0.81  if minor_market=="DELAWARE MINOR"
replace leader2013 =  0.23 if minor_market=="ARKANSAS MINOR"
replace leader2013 =  0.48 if minor_market=="CENTRAL TEXAS MINOR" | minor_market=="DALLAS MINOR" | minor_market=="HOUSTON MINOR"  | minor_market=="NORTH TEXAS MINOR" | minor_market=="SOUTH TEXAS MINOR" 
replace leader2013 =  0.21  if minor_market=="EAST PENNSYLVANIA MINOR" | minor_market=="PENNSYLVANIA MINOR" | minor_market=="WEST PENNSYLVANIA MINOR"
replace leader2013 =  0.54 if minor_market=="ILLINOIS MINOR" | minor_market=="RV ILLINOIS MINOR" | minor_market=="SOUTHERN IL MINOR" 
replace leader2013 =  0.84 if minor_market=="KANSAS CITY MO MINOR" | minor_market=="MISSOURI MINOR"
replace leader2013 =  0.6 if minor_market=="OKLAHOMA MINOR"
replace leader2013 =  0.67 if minor_market=="TENNESSEE MINOR"
replace leader2013 =  0.7 if minor_market=="WISCONSIN MINOR"
replace leader2013 =  0.6 if minor_market=="WYOMING MINOR"

gen leader2014=0
replace leader2014 = 0.69  if minor_market=="DELAWARE MINOR"
replace leader2014 = 0.65  if minor_market=="ARKANSAS MINOR"
replace leader2014 = 0.52 if minor_market=="CENTRAL TEXAS MINOR" | minor_market=="DALLAS MINOR" | minor_market=="HOUSTON MINOR"  | minor_market=="NORTH TEXAS MINOR" | minor_market=="SOUTH TEXAS MINOR" 
replace leader2014 = 0.23  if minor_market=="EAST PENNSYLVANIA MINOR" | minor_market=="PENNSYLVANIA MINOR" | minor_market=="WEST PENNSYLVANIA MINOR"
replace leader2014 = 0.6  if minor_market=="ILLINOIS MINOR" | minor_market=="RV ILLINOIS MINOR" | minor_market=="SOUTHERN IL MINOR" 
replace leader2014 = 0.44 if minor_market=="KANSAS CITY MO MINOR" | minor_market=="MISSOURI MINOR"
replace leader2014 = 0.65  if minor_market=="OKLAHOMA MINOR"
replace leader2014 = 0.67 if minor_market=="TENNESSEE MINOR"
replace leader2014 = 0.28  if minor_market=="WISCONSIN MINOR"
replace leader2014 = 0.62  if minor_market=="WYOMING MINOR"

gen leader2015=0
replace leader2015 = 0.65  if minor_market=="DELAWARE MINOR"
replace leader2015 = 0.69  if minor_market=="ARKANSAS MINOR"
replace leader2015 = 0.55  if minor_market=="CENTRAL TEXAS MINOR" | minor_market=="DALLAS MINOR" | minor_market=="HOUSTON MINOR"  | minor_market=="NORTH TEXAS MINOR" | minor_market=="SOUTH TEXAS MINOR" 
replace leader2015 = 0.34   if minor_market=="EAST PENNSYLVANIA MINOR" | minor_market=="PENNSYLVANIA MINOR" | minor_market=="WEST PENNSYLVANIA MINOR"
replace leader2015 = 0.66  if minor_market=="ILLINOIS MINOR" | minor_market=="RV ILLINOIS MINOR" | minor_market=="SOUTHERN IL MINOR" 
replace leader2015 = 0.45  if minor_market=="KANSAS CITY MO MINOR" | minor_market=="MISSOURI MINOR"
replace leader2015 = 0.72  if minor_market=="OKLAHOMA MINOR"
replace leader2015 = 0.65  if minor_market=="TENNESSEE MINOR"
replace leader2015 = 0.29  if minor_market=="WISCONSIN MINOR"
replace leader2015 = 0.69  if minor_market=="WYOMING MINOR"

gen numfirms5more2013=0
replace numfirms5more2013 = 2  if minor_market=="DELAWARE MINOR"
replace numfirms5more2013 = 5  if minor_market=="ARKANSAS MINOR"
replace numfirms5more2013 = 4  if minor_market=="CENTRAL TEXAS MINOR" | minor_market=="DALLAS MINOR" | minor_market=="HOUSTON MINOR"  | minor_market=="NORTH TEXAS MINOR" | minor_market=="SOUTH TEXAS MINOR" 
replace numfirms5more2013 = 7  if minor_market=="EAST PENNSYLVANIA MINOR" | minor_market=="PENNSYLVANIA MINOR" | minor_market=="WEST PENNSYLVANIA MINOR"
replace numfirms5more2013 = 3  if minor_market=="ILLINOIS MINOR" | minor_market=="RV ILLINOIS MINOR" | minor_market=="SOUTHERN IL MINOR" 
replace numfirms5more2013 = 2  if minor_market=="KANSAS CITY MO MINOR" | minor_market=="MISSOURI MINOR"
replace numfirms5more2013 = 4  if minor_market=="OKLAHOMA MINOR"
replace numfirms5more2013 = 3  if minor_market=="TENNESSEE MINOR"
replace numfirms5more2013 = 3 if minor_market=="WISCONSIN MINOR"
replace numfirms5more2013 = 4 if minor_market=="WYOMING MINOR"

gen numfirms5more2014=0
replace numfirms5more2014 = 2  if minor_market=="DELAWARE MINOR"
replace numfirms5more2014 = 3  if minor_market=="ARKANSAS MINOR"
replace numfirms5more2014 = 4 if minor_market=="CENTRAL TEXAS MINOR" | minor_market=="DALLAS MINOR" | minor_market=="HOUSTON MINOR"  | minor_market=="NORTH TEXAS MINOR" | minor_market=="SOUTH TEXAS MINOR" 
replace numfirms5more2014 = 7 if minor_market=="EAST PENNSYLVANIA MINOR" | minor_market=="PENNSYLVANIA MINOR" | minor_market=="WEST PENNSYLVANIA MINOR"
replace numfirms5more2014 = 4 if minor_market=="ILLINOIS MINOR" | minor_market=="RV ILLINOIS MINOR" | minor_market=="SOUTHERN IL MINOR" 
replace numfirms5more2014 = 5  if minor_market=="KANSAS CITY MO MINOR" | minor_market=="MISSOURI MINOR"
replace numfirms5more2014 = 4  if minor_market=="OKLAHOMA MINOR"
replace numfirms5more2014 = 3  if minor_market=="TENNESSEE MINOR"
replace numfirms5more2014 = 4 if minor_market=="WISCONSIN MINOR"
replace numfirms5more2014 = 4  if minor_market=="WYOMING MINOR"

gen numfirms5more2015=0
replace numfirms5more2015 = 2  if minor_market=="DELAWARE MINOR"
replace numfirms5more2015 = 3  if minor_market=="ARKANSAS MINOR"
replace numfirms5more2015 = 4 if minor_market=="CENTRAL TEXAS MINOR" | minor_market=="DALLAS MINOR" | minor_market=="HOUSTON MINOR"  | minor_market=="NORTH TEXAS MINOR" | minor_market=="SOUTH TEXAS MINOR" 
replace numfirms5more2015 = 5 if minor_market=="EAST PENNSYLVANIA MINOR" | minor_market=="PENNSYLVANIA MINOR" | minor_market=="WEST PENNSYLVANIA MINOR"
replace numfirms5more2015 = 4 if minor_market=="ILLINOIS MINOR" | minor_market=="RV ILLINOIS MINOR" | minor_market=="SOUTHERN IL MINOR" 
replace numfirms5more2015 = 5  if minor_market=="KANSAS CITY MO MINOR" | minor_market=="MISSOURI MINOR"
replace numfirms5more2015 = 4  if minor_market=="OKLAHOMA MINOR"
replace numfirms5more2015 = 3 if minor_market=="TENNESSEE MINOR"
replace numfirms5more2015 = 6 if minor_market=="WISCONSIN MINOR"
replace numfirms5more2015 = 4  if minor_market=="WYOMING MINOR"







**********************************************************************************************
*************************   AGREGATE THE DATA AT THE FIRM LEVEL ******************************
**********************************************************************************************

set more off

*compute the mean of age and gender in the pool 
bys customer_number  year        : egen mean_age = mean(age)
bys customer_number  year        : egen mean_female = mean(female)

gen employee = (rel_cd=="EE")
bys customer_number  year        : egen mean_employee = mean(employee)

gen spouse = (rel_cd=="SP")
bys customer_number  year        : egen mean_spouse = mean(spouse)

gen children = (rel_cd=="CH")
bys customer_number  year        : egen mean_children = mean(children)

gen others = (rel_cd=="NB" | rel_cd=="SC" | rel_cd=="SD" )
bys customer_number  year        : egen mean_others = mean(others)


**************************************************************************************
**************************************************************************************
**************************************************************************************
**** COMPUTE premiums, claims and risk scores from per person variables 
**************************************************************************************
**************************************************************************************
**************************************************************************************


**************************************************************************************
**************************************************************************************
**** CREATE VARIABLES R 
**************************************************************************************
**************************************************************************************
bys customer_number  year        : egen mean_pred_riskscore = mean(laggedscore)
bys customer_number  year        : egen mean_ORS_riskscore = mean(laggedscore_ORS)

bys prime_plan_code  year        : egen mean_pred_policy = mean(laggedscore)
bys prime_plan_code  year        : egen mean_ORS_policy = mean(laggedscore_ORS)

bys customer_number  year        : egen mean_pred_riskscore_sd = sd(laggedscore)
replace mean_pred_riskscore_sd=0 if mean_pred_riskscore_sd==.
**************************************************************************************
**************************************************************************************
**************************************************************************************



******* USIC VARIABLES 
**************************************************************************************
bys customer_number  year        : egen mean_pred_USIC = mean(lagged_usic_score)
**************************************************************************************


**************************************************************************************
bys customer_number  year        : egen mean_pred_riskscore_current = mean(acg_riskscore)
bys customer_number  year        : egen mean_pred_riskscore_stayers_aux = mean(laggedscore) if newstayers_allyears==1
bys customer_number  year        : egen mean_pred_riskscore_stayers = mean(mean_pred_riskscore_stayers_aux) 
****
bys customer_number  year        : egen mean_paid           = mean(laggedclaims)
bys prime_plan_code  year        : egen mean_paid_policy    = mean(laggedclaims)

bys customer_number  year        : egen mean_outpocket           = mean(laggedoutpocket)
bys customer_number  year        : egen mean_allowed_sum           = mean(laggedallowed_sum)
****
bys customer_number  year        : egen mean_paid_current           = mean(paid_sum)
bys customer_number  year        : egen mean_premium        = mean(av_premiumpc_plan)
bys prime_plan_code  year        : egen mean_premium_policy        = mean(av_premiumpc_plan)

gen aux11=1
bys prime_plan_code  year        : egen people_policy        = sum(aux11)
bys customer_number  year        : egen people_plan        = sum(aux11)

histogram people_policy
histogram people_plan

*********************
sort mbr_sys_id year
gen stayer = 1 if mbr_sys_id[_n] == mbr_sys_id[_n-1] & customer_number[_n]==customer_number[_n-1]
replace stayer=0 if stayer==.
bys customer_number year : egen percentage_stayers_aux = sum(stayer)
gen percentage_stayers = percentage_stayers_aux/numpeople
*********************

************************************************************************************************
saveold database_individual_level.dta, replace 
************************************************************************************************


*************************************************************************************************
** AUXILIARY CODE FOR STANDARD ERRORS 
*************************************************************************************************
set more off
**************************************************************************************************
/*
*/
**************************************************************************************************

*** Run regression to create residuals (second stage) 

*xtreg mean_premium mean_pred_riskscore_rp prob_leaving* yeardum* if year>2013 ,fe
*predict eps1 , e

reg  mean_premium mean_pred_riskscore_rp prob_leaving* yeardum* if year<2015
predict eps1 , res

gen n_obsreg = e(N)
scalar dh_dv1 = _b[prob_leaving] 
scalar dh_dv2 =  2* _b[prob_leaving2]* prob_leaving
scalar dh_dv3 =  3 * _b[prob_leaving3]* prob_leaving^2
scalar dh_dv4 =  4 * _b[prob_leaving4]* prob_leaving^3
scalar dh_dv5 =  5 * _b[prob_leaving5]* prob_leaving^4
scalar dh_dv6 =  6 * _b[prob_leaving6]* prob_leaving^5
gen partialpol1 = dh_dv1 + dh_dv2 + dh_dv3 + dh_dv4 + dh_dv5 + dh_dv6

*** Geneate M for polynomial 6 (second stage without y variable)  

***** CHANGE THIS
*xtreg    mean_pred_riskscore_rp prob_leaving* yeardum* if year<2015   ,fe
*predict upol1, e

reg mean_pred_riskscore_rp prob_leaving* yeardum* if year>2013
predict upol1, res

gen upol1_sq  = upol1^2
sum upol1_sq
scalar Mpol1 = r(sum) 

*generate variables with the derivaties of partial v partial alpha 
forvalues i=1/15{
gen partialv_partialalpha`i' = normalden(linear_index) * coeffprobit`i'
}

*generate H
forvalue i=1/15{
gen Haux_pol1_`i' =   upol1 * partialpol1 *  partialv_partialalpha`i'
sum Haux_pol1_`i' 
scalar e`i' = r(mean)
}
matrix H = (e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15)

*generate HVH'    
matrix DELTA_aux = H*V*H' 
scalar DELTA = DELTA_aux[1,1]

gen adjusting_factor = DELTA/ Mpol1
sum adjusting_factor



**************************************************************************************************
*************** TABLES APPENDIX 
**************************************************************************************************

**************************************************************************************************
**** COMPETITION RESULTS
**************************************************************************************************
**** CREATE COMPETITION VARIALES 
gen hhi = hhi2013 if year==2013
replace hhi=hhi2014 if year==2014
replace hhi=hhi2015 if year==2015
gen leader = leader2013 if year==2013
replace leader= leader2014 if year==2014
replace leader= leader2015 if year==2015
gen numfirms5more = numfirms5more2013 if year==2013
replace numfirms5more=numfirms5more2014 if year==2014
replace numfirms5more=numfirms5more2015 if year==2015

*create interacted variables 
gen hhi_interacted_score = hhi * mean_pred_riskscore
gen hhi_interacted_score_rp  = hhi * mean_pred_riskscore_rp
gen leader_interacted_score = leader * mean_pred_riskscore
gen leader_interacted_score_rp = leader * mean_pred_riskscore_rp
gen numfirms5more_int = numfirms5more * mean_pred_riskscore
gen numfirms5more_intrp = numfirms5more * mean_pred_riskscore_rp


**************************************************************************************************
**************************************************************************************************
**********************  END OF INDIVIDUAL LEVEL DATA *********************************************
**************************************************************************************************
**************************************************************************************************

*************************************************************************************************
**************************************************************************************************
**************************************************************************************************
**************************************************************************************************
*COLLAPSE THE DATABASE (create a local to denominate variables and collapse)
**************************************************************************************************
**************************************************************************************************
**************************************************************************************************
**************************************************************************************************

*local variables " mean_pred_USIC mean_pred_USIC_rp mean_spouse mean_children mean_others percentage_stayers large_claim_50 large_claim_75 large_claim_100 mean_pred_riskscore_sd lagged_code_hypertension lagged_code_heartfailure lagged_code_chronickidneydisease lagged_code_atrialfibrilization lagged_code_asthma mean_paid_current mean_pred_riskscore_current par2 par mean_employee hhi2013 hhi2014 hhi2015 leader2013 leader2014 leader2015 numfirms5more2013 numfirms5more2014 numfirms5more2015 lagged_chronic_diabetes lagged_chronic_transplant lagged_chronic_ami lagged_chronic_cancer mean_paid mean_outpocket mean_allowed_sum mean_pred_riskscore_rp mean_pred_riskscore mean_pred_riskscore_stayers mean_premium mean_age mean_female numpeople code_* relationship2"
local variables " mean_paid_policy mean_premium_policy mean_pred_policy mean_ORS_policy mean_pred_policy_rp mean_ORS_policy_rp mean_ORS_riskscore mean_ORS_riskscore_rp mean_spouse mean_children mean_others percentage_stayers large_claim_50 large_claim_75 large_claim_100 mean_pred_riskscore_sd lagged_code_hypertension lagged_code_heartfailure lagged_code_chronickidneydisease lagged_code_atrialfibrilization lagged_code_asthma mean_paid_current mean_pred_riskscore_current par2 par mean_employee hhi2013 hhi2014 hhi2015 leader2013 leader2014 leader2015 numfirms5more2013 numfirms5more2014 numfirms5more2015 lagged_chronic_diabetes lagged_chronic_transplant lagged_chronic_ami lagged_chronic_cancer mean_paid mean_outpocket mean_allowed_sum mean_pred_riskscore_rp mean_pred_riskscore mean_pred_riskscore_stayers mean_premium mean_age mean_female numpeople code_* relationship2 auxpremium2012"
local byvariables "customer_number  year  minor_market"
keep `variables' `byvariables'
collapse (mean) `variables', by(`byvariables')

*generate size of employers 
bys customer_number: egen max_numpeople =  max(numpeople) 
gen size = 1 if max_numpeople<=7
replace size = 2 if max_numpeople>7 & max_numpeople<=13
replace size = 3 if max_numpeople>13 & max_numpeople<=28
replace size = 4 if max_numpeople>28 

*auxiliary variables for regression
gen lpremium = log(mean_premium)
gen lscore = log(mean_pred_riskscore)
tab minor_market, gen(marketdum)
egen firm = group( customer_number ) 
tab year, gen(yeardum)
xtset firm year
***************************************************
gen numpeople2 = numpeople
replace numpeople2= 100 if numpeople>=100
label variable numpeople2  "Number of enrollees" 
histogram numpeople2,  ylabel(0(1000)4000, grid format(%5.0fc)) freq scheme(s1color)

sum mean_age mean_female mean_employee  lagged_chronic_cancer lagged_chronic_ami lagged_chronic_transplant lagged_chronic_diabetes mean_premium mean_pred_riskscore D. mean_pred_riskscore

sum mean_premium if year==2013
sum mean_premium if year==2014
sum mean_premium if year==2015

sum D. mean_pred_riskscore if year==2014
sum D. mean_pred_riskscore if year==2015

***************************************************************
***************************************************************
** HERE IS WHERE WE USE THE DATA FOR THE FIRM LEVEL 
***************************************************************
***************************************************************
cd "/Users/sebastianfleitas/Dropbox/ReclassificationRisk/dataset_revisions"
saveold database_firm_level.dta, replace 
